rm(list = ls())

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, rdrobust, readxl, parallel, pbapply, conflicted)

conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::select)

# Source helper function to tidy rdrobust results
source("code/helper_function.R")

# Helper function to drop missings

drop_missings_df <- function(df, varlist) {
  df <- df[complete.cases(df[, varlist]), ]
  df
}

# Load and prepare data
mz <- read_rds("data_not_upload/mz_1996_2016_clean.rds") %>%
  filter(east == 1) %>%
  filter(!is.na(yob), between(yob, 1960, 1980)) %>%
  mutate(
    state_id = as.numeric(state_id),
    year = as.numeric(year),
    gender = ifelse(male == 1, "Men", "Women")
  )

# Create state dummies
mz <- mz %>%
  fastDummies::dummy_cols(
    select_columns = "state_id",
    remove_first_dummy = TRUE
  )

# Define parameters
cutoff <- 1971
outcomes <- c("income_net", "employed", "main_income_work", "emp_befristet")

# Create empty rdrobust output for error handling
m_empty <- rdrobust(
  y = runif(100),
  x = runif(100),
  c = 0.5,
  deriv = 1,
  scalepar = 1
)

# Function to run analysis for a specific gender
run_analysis <- function(gender_val) {
  # Pre-filter data once for the gender
  gender_data <- mz %>%
    filter(gender == gender_val) %>%
    select(yob, year, all_of(outcomes))

  # Create a grid of all combinations
  analysis_grid <- expand.grid(
    outcome = outcomes,
    year = unique(gender_data$year),
    stringsAsFactors = FALSE
  )

  # Process all combinations in parallel
  temp <- pblapply(1:nrow(analysis_grid), function(i) {
    o <- analysis_grid$outcome[i]
    year <- analysis_grid$year[i]

    # Filter data for this year
    ss <- gender_data %>% filter(year == !!year)

    # Prepare outcome and drop missings
    ss[, "outcome"] <- ss[, o]
    ss <- ss %>% drop_missings_df(
      varlist =
        c(o, "yob")
    )

    # Calculate outcome SD
    sd_y <- sd(ss$outcome)

    # Run regression
    m1 <- tryCatch(
      rdrobust(
        y = ss$outcome,
        x = ss$yob,
        c = cutoff,
        deriv = 1,
        scalepar = -1,
        p = 1,
      ),
      error = function(cond) {
        return(list(model = m_empty, error = TRUE))
      }
    )

    # Check if there was an error
    has_error <- ifelse(is.list(m1) && "error" %in% names(m1), TRUE, FALSE)
    m1 <- if (has_error) m1$model else m1

    # Process results
    tidy_rd(m1, 3) %>%
      mutate(
        cutoff_y = cutoff,
        model = "sharp",
        covs = 0,
        year_mz = year,
        outcome = o,
        sd_y = sd_y,
        gender = gender_val,
        has_error = has_error
      )
  }) %>%
    reduce(rbind) %>%
    filter(
      cutoff_y == 1971,
      covs == 0
    ) %>%
    mutate(
      p_order = paste0("Polynomial Order = ", p),
      method = "rbc",
      bw_select = "auto",
      east = 1
    ) %>%
    filter(!abs(conf.high / sd_y) > 5)
}

# Run analysis for both genders
results_auto <- map_dfr(c("Men", "Women"), run_analysis) %>%
  filter(has_error == FALSE)

# Load outcome labels
m <- structure(list(outcome = c(
  "income_net", "employed", "main_income_work",
  "emp_befristet"
), label = c(
  "Net income", "Employed", "Income derives mainly from employment",
  "On fixed-term contract"
), order = c(2, 1, 4, 3)), row.names = c(
  NA,
  -4L
), class = c("tbl_df", "tbl", "data.frame"))


# Function to create plot
create_plot <- function(data, gender_val) {
  res_plot <- data %>%
    filter(gender == gender_val) %>%
    left_join(m) %>%
    mutate(
      label = fct_reorder(label, order)
    )

  pd <- position_dodge(0.6)

  ggplot(
    res_plot,
    aes(y = estimate / sd_y, x = year_mz, group = factor(covs))
  ) +
    geom_hline(yintercept = 0, linetype = "dotted") +
    geom_errorbar(
      aes(ymin = conf.low / sd_y, ymax = conf.high / sd_y),
      width = 0,
      position = pd
    ) +
    geom_point(
      size = 2,
      shape = 21,
      fill = "white",
      position = pd
    ) +
    facet_wrap(~label) +
    scale_color_discrete(name = "") +
    theme_bw() +
    ylab("Effect of one additional year of\neducation under autocracy (standard deviations)") +
    xlab("Survey year") +
    theme(legend.position = "bottom") +
    scale_x_continuous(breaks = seq(1996, 2016, 2))
}

# Create plots for both genders
p_men <- create_plot(results_auto, "Men")
p_women <- create_plot(results_auto, "Women")

# Display plots
p_men
p_women
